{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "QgumzOZ5wgec",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# Model Monitoring with Evidently and MLFlow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "rByuPhg7wgei",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import joblib\n",
    "import pandas as pd\n",
    "import mlflow\n",
    "import mlflow.sklearn\n",
    "from mlflow.tracking import MlflowClient\n",
    "\n",
    "from evidently.pipeline.column_mapping import ColumnMapping\n",
    "\n",
    "from src.reports import (\n",
    "    build_model_monitoring_report,\n",
    "    get_model_monitoring_metrics\n",
    ")\n",
    "\n",
    "from config import MLFLOW_TRACKING_URI, DATA_DIR, FILENAME, REPORTS_DIR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "HiiUl3p8wgej",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "warnings.simplefilter('ignore')"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "zw5Tap_Xwgej",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "## Load Data"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "VqGH1Mr6wgej",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "More information about the dataset can be found in UCI machine learning repository: https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset\n",
    "\n",
    "Acknowledgement: Fanaee-T, Hadi, and Gama, Joao, 'Event labeling combining ensemble detectors and background knowledge', Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "dKX2YV19wgek",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# Download original dataset with: python src/pipelines/load_data.py \n",
    "raw_data = pd.read_csv(f\"../{DATA_DIR}/{FILENAME}\")\n",
    "\n",
    "# Set datetime index \n",
    "raw_data = raw_data.set_index('dteday')\n",
    "\n",
    "\n",
    "raw_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define column mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "target = 'cnt'\n",
    "prediction = 'prediction'\n",
    "datetime = 'dteday'\n",
    "numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'mnth', 'hr', 'weekday']\n",
    "categorical_features = ['season', 'holiday', 'workingday', ]\n",
    "FEATURE_COLUMNS = numerical_features + categorical_features\n",
    "\n",
    "column_mapping = ColumnMapping()\n",
    "column_mapping.target = target\n",
    "column_mapping.prediction = prediction\n",
    "column_mapping.datetime = datetime\n",
    "column_mapping.numerical_features = numerical_features\n",
    "column_mapping.categorical_features = categorical_features"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "dhZOCJZ1wgel",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "## Load Model (from the MLFlow)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "PWJv-0UKwgen",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# Update MLFLOW_RUN_ID \n",
    "\n",
    "# Copy a Run ID from the \"Model Testing\" experiment\n",
    "MLFLOW_RUN_ID = '3fa3d32fbe214b8bb94c938e31e7cc57'\n",
    "\n",
    "# Set up MLFlow Client\n",
    "mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n",
    "client = MlflowClient()\n",
    "print(f\"Client tracking uri: {client.tracking_uri}\")\n",
    "\n",
    "# Get \n",
    "model_path = mlflow.artifacts.download_artifacts(\n",
    "    run_id=MLFLOW_RUN_ID, \n",
    "    artifact_path='model.joblib'\n",
    ")\n",
    "print(\"Model path: \", model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load Model\n",
    "\n",
    "regressor = joblib.load(model_path)\n",
    "\n",
    "regressor"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Define dates for train and inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "TZKZ_biHwgen",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# Define dates for train data\n",
    "train_dates = ('2011-01-02 00:00:00','2011-03-06 23:00:00')\n",
    "\n",
    "# Define dates for inference batches\n",
    "prediction_batches = [ \n",
    "    ('2011-03-07 00:00:00','2011-03-13 23:00:00'),\n",
    "    ('2011-03-14 00:00:00','2011-03-20 23:00:00'),\n",
    "    ('2011-03-21 00:00:00','2011-03-27 23:00:00'), \n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define the Reference data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the reference dataset\n",
    "reference_data = raw_data.loc[train_dates[0]:train_dates[1]]\n",
    "reference_prediction = regressor.predict(reference_data[FEATURE_COLUMNS])\n",
    "reference_data['prediction'] = reference_prediction\n",
    "\n",
    "print(reference_data.shape)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "2rJyfTJ3wger",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# Monitor Model"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "7eBBRAOTwger",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "### Week 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "TqljYICLwger",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "current_dates = prediction_batches[0]\n",
    "current_data = raw_data.loc[current_dates[0]:current_dates[1]]  \n",
    "\n",
    "print(current_data.shape)\n",
    "# current_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "current_prediction = regressor.predict(current_data[numerical_features + categorical_features])\n",
    "current_data['prediction'] = current_prediction\n",
    "\n",
    "print(current_data.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "LvuPle_Nwges"
   },
   "outputs": [],
   "source": [
    "model_report = build_model_monitoring_report(\n",
    "    reference_data=reference_data.reset_index(drop=True),\n",
    "    current_data=current_data.reset_index(drop=True),\n",
    "    column_mapping=column_mapping,\n",
    ")\n",
    "\n",
    "model_metrics = get_model_monitoring_metrics(model_report)\n",
    "model_metrics"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "SbmG0hvSwges",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "### Week 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "current_dates = prediction_batches[1]\n",
    "current_data = raw_data.loc[current_dates[0]:current_dates[1]]  \n",
    "\n",
    "current_prediction = regressor.predict(current_data[numerical_features + categorical_features])\n",
    "current_data['prediction'] = current_prediction\n",
    "\n",
    "print(current_dates)\n",
    "print(current_data.shape)\n",
    "# current_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "d6tBIZDUwget",
    "pycharm": {
     "name": "#%%\n"
    },
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "model_report = build_model_monitoring_report(\n",
    "    reference_data=reference_data.reset_index(drop=True),\n",
    "    current_data=current_data.reset_index(drop=True),\n",
    "    column_mapping=column_mapping,\n",
    ")\n",
    "\n",
    "model_metrics = get_model_monitoring_metrics(model_report)\n",
    "model_metrics"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "q5lW24Xzwgex",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# Model Quality Evaluation (Prod)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "I_IyYlM0wgey",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# Set up MLFlow Client\n",
    "mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n",
    "client = MlflowClient()\n",
    "print(f\"Client tracking uri: {client.tracking_uri}\")\n",
    "\n",
    "# Set experiment name\n",
    "mlflow.set_experiment(\"Monitor Model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "bTHU8eAqwgez",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "\n",
    "# Run model monitoring for each batch of dates\n",
    "for current_dates in prediction_batches:\n",
    "    \n",
    "    print(f\"Current batch dates: {current_dates}\") \n",
    "\n",
    "    # Start a new Run for the batch\n",
    "    with mlflow.start_run(run_name=current_dates[1]) as run: \n",
    "        \n",
    "        # Show newly created run metadata info\n",
    "        print(\"Experiment id: {}\".format(run.info.experiment_id))\n",
    "        print(\"Run id: {}\".format(run.info.run_id))\n",
    "        print(\"Run name: {}\".format(run.info.run_name))\n",
    "            \n",
    "        # Log parameters\n",
    "        mlflow.log_param(\"begin\", current_dates[0])\n",
    "        mlflow.log_param(\"end\", current_dates[1])\n",
    "        \n",
    "        # Make predictions for the current batch data\n",
    "        current_data = raw_data.loc[current_dates[0]:current_dates[1]]\n",
    "        current_prediction = regressor.predict(current_data[FEATURE_COLUMNS])\n",
    "        current_data['prediction'] = current_prediction\n",
    "\n",
    "        # Build the Model Monitoring report\n",
    "        model_report = build_model_monitoring_report(\n",
    "            reference_data=reference_data.reset_index(drop=True),\n",
    "            current_data=current_data.reset_index(drop=True),\n",
    "            column_mapping=column_mapping,\n",
    "        )\n",
    "        \n",
    "        # Log Metrics\n",
    "        model_metrics = get_model_monitoring_metrics(model_report)\n",
    "        mlflow.log_metrics(model_metrics)\n",
    "        \n",
    "        # Log Monitoring Report \n",
    "        monitoring_report_path = f\"../{REPORTS_DIR}/model_monitoring_report.html\"\n",
    "        model_report.save_html(monitoring_report_path)\n",
    "        mlflow.log_artifact(monitoring_report_path)\n",
    "        \n",
    "        print(run.info)"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
